#  ██████╗ ██╗  ██╗██╗   ██╗██╗      ██████╗ ██████╗ ██╗  ██╗███████╗██████╗ ███████╗
#  ██╔══██╗██║  ██║╚██╗ ██╔╝██║     ██╔═══██╗██╔══██╗██║  ██║██╔════╝██╔══██╗██╔════╝
#  ██████╔╝███████║ ╚████╔╝ ██║     ██║   ██║██████╔╝███████║█████╗  ██████╔╝█████╗
#  ██╔═══╝ ██╔══██║  ╚██╔╝  ██║     ██║   ██║██╔═══╝ ██╔══██║██╔══╝  ██╔══██╗██╔══╝
#  ██║     ██║  ██║   ██║   ███████╗╚██████╔╝██║     ██║  ██║███████╗██║  ██║███████╗
#  ╚═╝     ╚═╝  ╚═╝   ╚═╝   ╚══════╝ ╚═════╝ ╚═╝     ╚═╝  ╚═╝╚══════╝╚═╝  ╚═╝╚══════╝

## PHYLOPHERE: A Nextflow pipeline including a complete set
## of phylogenetic comparative tools and analyses for Phenome-Genome studies

### Github: https://github.com/nozerorma/caastools/nf-phylophere
### Author: Miguel Ramon (miguel.ramon@upf.edu)

#### File: phenotype_exploration.Rmd

Phenotype Exploration

This script allows for the visualization of trait distributions and phylogenetic patterns. The script includes contrast plots and phylogenetic trees to assess trait relationships.

Setup and Directory Configuration

This section configures the working environment, sets directories, and loads necessary functions and libraries.

# Call the setup function from commons.R
source("./obj/commons.R")
## [DEBUG] args = cancer_traits_processed-LQ.csv | science.abn7829_data_s4.nex.tree | data_exploration | 1998 | primates | family | malignant_prevalence | adult_necropsy_count | malignant_count | /home/miguel/IBE-UPF/PhD/PhyloPhere/Data/5.Phylogeny/taxid_species_family.tsv | neoplasia_prevalence | LQ
## [DEBUG] seed_val = 1998
## [DEBUG] workingDir = /home/miguel/IBE-UPF/PhD/PhyloPhere/work/47/c58fe8ab044531b921244e210c9e62
## [DEBUG] objDir = /home/miguel/IBE-UPF/PhD/PhyloPhere/work/47/c58fe8ab044531b921244e210c9e62/obj
## [DEBUG] resultsDir = data_exploration
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## [1] "Working Directory: /home/miguel/IBE-UPF/PhD/PhyloPhere/work/47/c58fe8ab044531b921244e210c9e62"
## [1] "Results Directory: data_exploration"
## [DEBUG] Working Directory: /home/miguel/IBE-UPF/PhD/PhyloPhere/work/47/c58fe8ab044531b921244e210c9e62
## [DEBUG] Results Directory: data_exploration
## [DEBUG] data_exploration_dir = data_exploration/1.Data-exploration
## [DEBUG] species_distribution_dir = data_exploration/1.Data-exploration/1.Species_distribution
## [DEBUG] extreme_plots_dir = data_exploration/1.Data-exploration/2.Extreme_plots
## [DEBUG] asr_trees = data_exploration/1.Data-exploration/3.ASR_trees
## [DEBUG] phylo_distribution_dir = data_exploration/1.Data-exploration/4.Phylogenetic_distribution
## [DEBUG] ci_dir = data_exploration/1.Data-exploration/5.CI_overlaps
## [DEBUG] palettes loaded: primates=15, primates_dark=15, mammals=17
## [1] "Loading trait data from: cancer_traits_processed-LQ.csv"
## [DEBUG] trait_path = cancer_traits_processed-LQ.csv
## [DEBUG] trait_df rows = 47, cols = 19
## [DEBUG] trait_df columns: tax_id, species, common_name, family, adult_necropsy_count, neoplasia_necropsy, neoplasia_prevalence, benign_count, benign_prevalence, malignant_count, malignant_prevalence, body_mass, mass_g, log_body_mass, mature_f, mature_m, mature_age, MLS.anage, LQ
## [DEBUG] trait_df species unique = 47
## [DEBUG] tree_path = science.abn7829_data_s4.nex.tree
## [DEBUG] tree tips = 236, nodes = 235
## [DEBUG] clade_name = primates
## [DEBUG] taxon_of_interest = family
## [DEBUG] trait = malignant_prevalence
## [DEBUG] n_trait = malignant_count
## [DEBUG] p_trait = adult_necropsy_count
## [DEBUG] has.p = TRUE, p missing = 0
## [DEBUG] has.n = TRUE, n missing = 0
## Warning: package 'ape' was built under R version 4.4.2
## 
## Attaching package: 'ape'
## The following object is masked from 'package:dplyr':
## 
##     where
## [DEBUG] tax_id_file = /home/miguel/IBE-UPF/PhD/PhyloPhere/Data/5.Phylogeny/taxid_species_family.tsv
## [DEBUG] tax_id_df rows = 1290, distinct taxa = 807
## [DEBUG] has.TAX_ID = TRUE
## [DEBUG] trait_df merged with tax_id_df: rows = 47, missing tax_id = 0
## [DEBUG] normalized tax_id from merged columns, missing tax_id = 0
## [DEBUG] tree_ids rows = 40, missing tax_id = 0
## [DEBUG] common_tax_ids = 40
## [DEBUG] pruned_tree tips (TAX_ID) = 40, nodes = 39
## [DEBUG] trait_df after TAX_ID tree filter rows = 40
## [DEBUG] secondary_trait = neoplasia_prevalence
## [DEBUG] has.secondary = TRUE, missing = 0
## [DEBUG] branch_trait = LQ
## [DEBUG] has.branch = TRUE, missing = 0
setup_rmd()

# Debug helper (prints into HTML output)
if (is.null(getOption("phylo_debug"))) {
  options(phylo_debug = TRUE)
}
if (!exists("phylo_debug_log", envir = .GlobalEnv)) {
  phylo_debug_log <- character()
}
if (!exists("debug_log", inherits = TRUE)) {
  debug_log <- function(...) {
    msg <- sprintf(...)
    phylo_debug_log <<- c(phylo_debug_log, msg)
    cat("[DEBUG] ", msg, "\n", sep = "")
  }
}

# Load necessary libraries
library(dplyr)
library(ggplot2)
library(readr)
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(ggrepel)
library(ggtree)
## Warning: package 'ggtree' was built under R version 4.4.2
## ggtree v3.14.0 Learn more at https://yulab-smu.top/contribution-tree-data/
## 
## Please cite:
## 
## Guangchuang Yu. Using ggtree to visualize data on tree-like structures.
## Current Protocols in Bioinformatics. 2020, 69:e96. doi:10.1002/cpbi.96
## 
## Attaching package: 'ggtree'
## The following object is masked from 'package:ape':
## 
##     rotate
## The following object is masked from 'package:tidyr':
## 
##     expand
library(ggnewscale)
library(ggstar)
library(colorspace)
library(treeio)
## Warning: package 'treeio' was built under R version 4.4.2
## treeio v1.30.0 Learn more at https://yulab-smu.top/contribution-tree-data/
## 
## Please cite:
## 
## LG Wang, TTY Lam, S Xu, Z Dai, L Zhou, T Feng, P Guo, CW Dunn, BR
## Jones, T Bradley, H Zhu, Y Guan, Y Jiang, G Yu. treeio: an R package
## for phylogenetic tree input and output with richly annotated and
## associated data. Molecular Biology and Evolution. 2020, 37(2):599-603.
## doi: 10.1093/molbev/msz240
library(tidytree)
## If you use the ggtree package suite in published research, please cite
## the appropriate paper(s):
## 
## Guangchuang Yu.  Data Integration, Manipulation and Visualization of
## Phylogenetic Trees (1st edition). Chapman and Hall/CRC. 2022,
## doi:10.1201/9781003279242
## 
## Guangchuang Yu, David Smith, Huachen Zhu, Yi Guan, Tommy Tsan-Yuk Lam.
## ggtree: an R package for visualization and annotation of phylogenetic
## trees with their covariates and other associated data. Methods in
## Ecology and Evolution. 2017, 8(1):28-36. doi:10.1111/2041-210X.12628
## 
## Attaching package: 'tidytree'
## The following object is masked from 'package:treeio':
## 
##     getNodeNum
## The following objects are masked from 'package:ape':
## 
##     drop.tip, keep.tip
## The following object is masked from 'package:stats':
## 
##     filter
library(phylolm)
library(ggtreeExtra)
## Warning: package 'ggtreeExtra' was built under R version 4.4.2
## ggtreeExtra v1.16.0 For help: https://yulab-smu.top/treedata-book/
## 
## If you use the ggtree package suite in published research, please cite
## the appropriate paper(s):
## 
## S Xu, Z Dai, P Guo, X Fu, S Liu, L Zhou, W Tang, T Feng, M Chen, L
## Zhan, T Wu, E Hu, Y Jiang, X Bo, G Yu. ggtreeExtra: Compact
## visualization of richly annotated phylogenetic data. Molecular Biology
## and Evolution. 2021, 38(9):4039-4042. doi: 10.1093/molbev/msab166
library(geiger)
## Loading required package: phytools
## Loading required package: maps
## 
## Attaching package: 'phytools'
## The following object is masked from 'package:treeio':
## 
##     read.newick
## 
## Attaching package: 'geiger'
## The following object is masked from 'package:tidytree':
## 
##     treedata
## The following object is masked from 'package:treeio':
## 
##     treedata
library(rphylopic)
## You are using rphylopic v.1.5.0. Please remember to credit PhyloPic contributors (hint: `get_attribution()`) and cite rphylopic in your work (hint: `citation("rphylopic")`).
# Objects
color_palette <- paste0(clade_name, "_palette") # Color palette for the clade
capitalized_taxon <- tools::toTitleCase(taxon_of_interest) # Capitalized taxon name
capitalized_trait <- tools::toTitleCase(gsub("_", " ", trait)) # Capitalized and deconvoluted trait name
capitalized_clade <- tools::toTitleCase(gsub("_", " ", clade_name)) # Capitalized and deconvoluted clade name

createDir(extreme_plots_dir)
## [DEBUG] createDir: created data_exploration/1.Data-exploration/2.Extreme_plots
createDir(asr_trees)
## [DEBUG] createDir: created data_exploration/1.Data-exploration/3.ASR_trees
createDir(phylo_distribution_dir)
## [DEBUG] createDir: created data_exploration/1.Data-exploration/4.Phylogenetic_distribution

1. Data Import

This section imports trait summary data from the dataset exploration stage.

# Load summary stats from Dataset Exploration
stats_path <- file.path(species_distribution_dir, "trait_stats.csv")
if (!file.exists(stats_path)) {
  stop("Missing trait stats file: ", stats_path)
}
stats_df <- read.csv(stats_path, stringsAsFactors = FALSE)

1. Load Melted Trait Data

This section loads pre-processed trait data in a long (melted) format and adds common names for species.

# 1. LOAD STATS ------------------------------------------------------
# Excerpt of the trait stats data
head(stats_df)
##                species   family malignant_prevalence malignant_count
## 1      Alouatta_caraya Atelidae           0.03125000               1
## 2     Ateles_geoffroyi Atelidae           0.18604651               8
## 3    Callimico_goeldii  Cebidae           0.10144928               7
## 4 Callithrix_geoffroyi  Cebidae           0.03225806               1
## 5   Callithrix_jacchus  Cebidae           0.02439024               1
## 6     Cebuella_pygmaea  Cebidae           0.04878049               2
##   adult_necropsy_count neoplasia_prevalence        LQ  taxa_mean taxa_median
## 1                   32           0.12500000 0.9846315 0.10864826   0.1086483
## 2                   43           0.30232558 1.3561723 0.10864826   0.1086483
## 3                   69           0.21739130 1.0393871 0.04203321   0.0400000
## 4                   31           0.09677419 0.9029786 0.04203321   0.0400000
## 5                   41           0.02439024 1.2363178 0.04203321   0.0400000
## 6                   41           0.09756098 1.1559844 0.04203321   0.0400000
##      taxa_sd   taxa_q25   taxa_q75      value    g_mean   g_median      g_sd
## 1 0.10945766 0.06994913 0.14734738 0.03125000 0.0669974 0.04061856 0.0670716
## 2 0.10945766 0.06994913 0.14734738 0.18604651 0.0669974 0.04061856 0.0670716
## 3 0.03884852 0.00000000 0.05555556 0.10144928 0.0669974 0.04061856 0.0670716
## 4 0.03884852 0.00000000 0.05555556 0.03225806 0.0669974 0.04061856 0.0670716
## 5 0.03884852 0.00000000 0.05555556 0.02439024 0.0669974 0.04061856 0.0670716
## 6 0.03884852 0.00000000 0.05555556 0.04878049 0.0669974 0.04061856 0.0670716
##        g_q25     g_q75 outlier extreme_outlier global_label taxa_outlier
## 1 0.02224955 0.1003623  normal          normal       normal       normal
## 2 0.02224955 0.1003623  normal          normal high_extreme       normal
## 3 0.02224955 0.1003623  normal          normal high_extreme       normal
## 4 0.02224955 0.1003623  normal          normal       normal       normal
## 5 0.02224955 0.1003623  normal          normal       normal       normal
## 6 0.02224955 0.1003623  normal          normal       normal       normal
##   extreme_taxa_outlier   taxa_label
## 1               normal  low_extreme
## 2               normal high_extreme
## 3               normal high_extreme
## 4               normal       normal
## 5               normal       normal
## 6               normal       normal
stats_df <- stats_df %>%
  dplyr::mutate(
    value = .data[[trait]],
    taxa = .data[[taxon_of_interest]],
    common_name = gsub("_", " ", species)
  )

if(isTRUE(has.p)){
  stats_df <- stats_df %>%   
    mutate(p = .data[[p_trait]])
  debug_log("contrast_plot.f using p_trait = %s, missing p = %d", p_trait, sum(is.na(stats_df$p)))
}
## [DEBUG] contrast_plot.f using p_trait = adult_necropsy_count, missing p = 0
# Ensure species order matches phylogeny
ordered_species <-  tidytree::as_tibble(pruned_tree) %>%
  dplyr::arrange(desc(.data$node)) %>%
  # Remove internal nodes by filtering only rows where label is not NA
  dplyr::filter(!is.na(.data$label)) %>%
  dplyr::pull(.data$label)

print(ordered_species)
##  [1] "Galago_moholi"              "Galago_senegalensis"       
##  [3] "Nycticebus_coucang"         "Nycticebus_pygmaeus"       
##  [5] "Microcebus_murinus"         "Propithecus_coquereli"     
##  [7] "Varecia_variegata"          "Varecia_rubra"             
##  [9] "Lemur_catta"                "Eulemur_macaco"            
## [11] "Hylobates_lar"              "Gorilla_gorilla"           
## [13] "Pan_troglodytes"            "Colobus_guereza"           
## [15] "Trachypithecus_francoisi"   "Trachypithecus_cristatus"  
## [17] "Trachypithecus_auratus"     "Cercopithecus_neglectus"   
## [19] "Mandrillus_sphinx"          "Papio_hamadryas"           
## [21] "Theropithecus_gelada"       "Macaca_nigra"              
## [23] "Macaca_silenus"             "Macaca_fuscata"            
## [25] "Macaca_fascicularis"        "Ateles_geoffroyi"          
## [27] "Alouatta_caraya"            "Saimiri_sciureus"          
## [29] "Sapajus_apella"             "Saguinus_imperator"        
## [31] "Saguinus_midas"             "Saguinus_bicolor"          
## [33] "Saguinus_oedipus"           "Leontopithecus_chrysomelas"
## [35] "Leontopithecus_rosalia"     "Callithrix_jacchus"        
## [37] "Callithrix_geoffroyi"       "Mico_argentatus"           
## [39] "Cebuella_pygmaea"           "Callimico_goeldii"
stats_df$species <- factor(stats_df$species, levels = ordered_species)
debug_log("contrast_plot.f ordered_species = %d", length(ordered_species))
## [DEBUG] contrast_plot.f ordered_species = 40
# Define ordered taxa for plotting based on phylogeny order
ordered_taxa <- ordered_species %>%
  sapply(function(sp) {
    idx <- which(stats_df$species == sp)
    if (length(idx) == 0) return(NA_character_)
    stats_df$taxa[idx[1]]
  }) %>%
  unname() %>%
  na.omit() %>%
  unique()

print(ordered_taxa)
##  [1] "Galagidae"       "Lorisidae"       "Cheirogaleidae"  "Indriidae"      
##  [5] "Lemuridae"       "Hylobatidae"     "Hominidae"       "Cercopithecidae"
##  [9] "Atelidae"        "Cebidae"
# Arrange stats_df by taxa
stats_df <- stats_df %>%
  dplyr::mutate(taxa = factor(taxa, levels = ordered_taxa)) %>%
  dplyr::arrange(taxa, .by_group = TRUE)

3. Contrast Plots

This section generates contrast plots to visualize the trait across taxa, highlighting extreme values and outliers.

## PLOT FUNCTION --------------------------------------------------------------
### 1. Contrast plot function
contrast_plot.f <- function(df) {
  stopifnot(all(c("species", taxon_of_interest, trait) %in% names(df)))
  palette_values <- get_palette_values()
  debug_log("contrast_plot.f rows = %d, trait = %s, taxon = %s", nrow(df), trait, taxon_of_interest)
  debug_log("contrast_plot.f columns: %s", paste(names(df), collapse = ", "))
  fill_scale <- if (is.null(palette_values)) {
    ggplot2::scale_fill_discrete()
  } else {
    ggplot2::scale_fill_manual(values = palette_values)
  }
  color_scale <- if (is.null(palette_values)) {
    ggplot2::scale_color_discrete()
  } else {
    ggplot2::scale_color_manual(values = palette_values)
  }

  global_median <- df$g_median
  
  ggplot(df, aes(x = value, y = taxa)) +
    ggplot2::scale_y_discrete(limits = rev(ordered_taxa)) +
    ggplot2::geom_point(
      data = subset(df, global_label == "normal"),
      aes(shape = global_label, color = taxa), size = 5
    ) +
    ggplot2::geom_point(
      data = subset(df, global_label %in% c("low_extreme", "high_extreme")),
      aes(shape = global_label, color = taxa, fill = taxa), size = 5
    ) +
    ggplot2::scale_shape_manual(values = c("low_extreme" = 15, "normal" = 1, "high_extreme" = 17)) +
    ggplot2::scale_fill_manual(values = palette_values, breaks = 0) +
    ggplot2::scale_color_manual(values = palette_values, breaks = 0) +
    ggplot2::geom_vline(xintercept = global_median, linetype = "longdash", linewidth = 1.2, color = "salmon3") +
    ggplot2::stat_summary(fun = median, geom = "errorbar",
                          aes(xmax = after_stat(x), xmin = after_stat(x), y = taxa),
                          linewidth = 1.2, color = "skyblue4", alpha = 0.8) +
    ggplot2::labs(x = capitalized_trait, 
                  y = paste(capitalized_taxon, "-", capitalized_clade), 
                  title = paste("Trait differential plot for trait:", capitalized_trait, "in", clade_name),
                  caption = "Global median shown as a longdash red line. Per taxa median shown as skyblue error bars. 
                  Extreme values were identified based on global thresholds (top and bottom quantiles).") +
    ggrepel::geom_text_repel(
      data = subset(df, global_label %in% c("low_extreme", "high_extreme")),
      aes(label = species), size = 5, hjust = 0, vjust = 0, family = "Inter",
      segment.size = 0.3, nudge_x = 0.010, nudge_y = 0.5, max.overlaps = Inf,
      force = 1, direction = "y", max.iter = 10000, label.padding = 20,
      min.segment.length = 0.06, seed = seed_val, show.legend = FALSE
    ) +
    ggplot2::theme_minimal() +
    ggplot2::theme(
      plot.title = ggplot2::element_text(size = 20, hjust = 0.5, margin = ggplot2::margin(t = 20), face = "bold"),
      axis.text.y = ggplot2::element_text(size = 15),
      axis.title.x = ggplot2::element_text(size = 15, hjust = 0.5, margin = ggplot2::margin(t = 20, b = 20)),
      axis.title.y = ggplot2::element_text(size = 15, angle = 90, hjust = 0.5, margin = ggplot2::margin(l = 20, r = 20)),
      caption = ggplot2::element_text(size = 12, hjust = 0.5, margin = ggplot2::margin(t = 20)),
      legend.text = ggplot2::element_text(size = 15),
      legend.position = "bottom"
    )
}

# 3. CONTRAST PLOTS  ------------------------------------------------------
plot_ready <- !is.null(stats_df) && nrow(stats_df) > 0
required_cols <- c("species", taxon_of_interest, trait)
plot_ready <- plot_ready && all(required_cols %in% names(stats_df))

if (!plot_ready) {
  message("Skipping contrast plot: missing or empty trait stats.")
} else {
  contrast_graph <- contrast_plot.f(stats_df)
  ggplot2::ggsave(
    file.path(extreme_plots_dir, paste0(trait, "_contrast_plot.png")),
    contrast_graph, device = "png", width = 15, height = 10, dpi = "retina"
  )
  contrast_graph
}
## [DEBUG] contrast_plot.f rows = 40, trait = malignant_prevalence, taxon = family
## [DEBUG] contrast_plot.f columns: species, family, malignant_prevalence, malignant_count, adult_necropsy_count, neoplasia_prevalence, LQ, taxa_mean, taxa_median, taxa_sd, taxa_q25, taxa_q75, value, g_mean, g_median, g_sd, g_q25, g_q75, outlier, extreme_outlier, global_label, taxa_outlier, extreme_taxa_outlier, taxa_label, taxa, common_name, p
## Warning in ggrepel::geom_text_repel(data = subset(df, global_label %in% :
## Ignoring unknown parameters: `label.padding`
## Warning in plot_theme(plot): The `caption` theme element is not defined in the element hierarchy.
## The `caption` theme element is not defined in the element hierarchy.
## The `caption` theme element is not defined in the element hierarchy.

4. Violin Plots

This section generates violin plots to show trait distributions across taxa.

### PLOT FUNCTION --------------------------------------------------------------
violin_extremes.f <- function(df, trait, taxon_of_interest) {
  stopifnot(all(c("species", taxon_of_interest, trait) %in% names(df)))
  palette_values <- get_palette_values()
  debug_log("violin_extremes.f rows = %d, trait = %s, taxon = %s", nrow(df), trait, taxon_of_interest)
  fill_scale <- if (is.null(palette_values)) {
    ggplot2::scale_fill_discrete()
  } else {
    ggplot2::scale_fill_manual(values = palette_values)
  }
  dark_palette_values <- get_palette_values(paste0("dark_", color_palette))
  fill_scale_dark <- if (is.null(dark_palette_values)) {
    fill_scale
  } else {
    ggplot2::scale_fill_manual(values = dark_palette_values)
  }
  color_scale_dark <- if (is.null(dark_palette_values)) {
    ggplot2::scale_color_discrete()
  } else {
    ggplot2::scale_color_manual(values = dark_palette_values)
  }

  # Ensure species order matches phylogeny
  ordered_species <- pruned_tree$tip.label
  df$species <- factor(df$species, levels = ordered_species)
  debug_log("violin_extremes.f ordered_species = %d", length(ordered_species))
  
  # Define ordered taxa for plotting using global phylogeny order if available
  if (!exists("ordered_taxa", inherits = TRUE) || length(ordered_taxa) == 0) {
    ordered_taxa <- unique(pruned_tree$tip.label %>%
      sapply(function(sp) df$taxa[df$species == sp]) %>%
      na.omit())
    if (length(ordered_taxa) == 0) {
      ordered_taxa <- unique(df$taxa[!is.na(df$taxa)])
    }
  }
  
  # Calculate median value for reference line
  median_value <- median(df$value, na.rm = TRUE)
  
  # Create violin plot
  plot <- ggplot2::ggplot(data = df, aes(x = value, y = taxa)) +
    ggplot2::scale_y_discrete(limits = rev(ordered_taxa)) +
    ggplot2::geom_violin(
      aes(fill = taxa), color = "black", width = 0.5, trim = TRUE, scale = "width", alpha = 0.15
    ) +
    fill_scale_dark +
    ggplot2::geom_point(
      data = subset(df, outlier == "normal"),
      aes(shape = global_label, color = taxa),
      size = 3, position = ggplot2::position_jitter(height = 0.25)
    ) +
    ggplot2::geom_point(
      data = subset(df, outlier != "normal"),
      aes(color = taxa, fill = taxa),
      shape = 21, size = 3, stroke = 0.8, position = ggplot2::position_jitter(height = 0.25)
    ) +
    ggplot2::scale_shape_manual(values = c("low_extreme" = 15, "normal" = 1, "high_extreme" = 17)) +
    color_scale_dark +
    ggplot2::geom_vline(
      aes(xintercept = median_value), linewidth = 1, color = "salmon3", alpha = 0.5
    ) +
    ggplot2::stat_summary(
      fun = median, geom = "errorbar",
      aes(xmax = after_stat(x), xmin = after_stat(x)),
      linewidth = 1.5, color = "black", alpha = 0.6, width = 0.75
    ) +
    ggplot2::labs(x = capitalized_trait) +
    ggplot2::theme_minimal() +
    ggplot2::theme(
      axis.text.y = ggplot2::element_text(size = 17, hjust = 0.5, face = "bold"),
      axis.title.x = ggplot2::element_text(size = 17, hjust = 0.5, face = "bold", margin = ggplot2::margin(t = 20, b = 20)),
      axis.title.y = ggplot2::element_blank(),
      legend.text = ggplot2::element_text(size = 15),
      legend.position = "none"
    )
  
  return(plot)
}

if (!plot_ready) {
  message("Skipping violin plot: missing or empty trait stats.")
} else {
  violin_plot_path <- file.path(extreme_plots_dir, paste0(trait, "_violin_plot.png"))
  violin_graph <- violin_extremes.f(stats_df, trait, taxon_of_interest)
  ggplot2::ggsave(
    violin_plot_path,
    violin_graph, device = "png", width = 7, height = 10, dpi = "retina"
  )
  violin_graph
}
## [DEBUG] violin_extremes.f rows = 40, trait = malignant_prevalence, taxon = family
## [DEBUG] violin_extremes.f ordered_species = 40
## Warning: Groups with fewer than two datapoints have been dropped.
## ℹ Set `drop = FALSE` to consider such groups for position adjustment purposes.
## Groups with fewer than two datapoints have been dropped.
## ℹ Set `drop = FALSE` to consider such groups for position adjustment purposes.
## Groups with fewer than two datapoints have been dropped.
## ℹ Set `drop = FALSE` to consider such groups for position adjustment purposes.
## Groups with fewer than two datapoints have been dropped.
## ℹ Set `drop = FALSE` to consider such groups for position adjustment purposes.
## Groups with fewer than two datapoints have been dropped.
## ℹ Set `drop = FALSE` to consider such groups for position adjustment purposes.
## Groups with fewer than two datapoints have been dropped.
## ℹ Set `drop = FALSE` to consider such groups for position adjustment purposes.

5. Ancestral State Reconstruction (ASR) Plots

This section generates ASR plots to visualize the evolutionary history of the trait across the phylogenetic

# 4. ASR PLOTS  ------------------------------------------------------
asr_tree.f <- function(df, tree, trait, taxon_of_interest) {
  
  # --- Trait vector ---
  trait_vec <- df[[trait]]
  names(trait_vec) <- df$species
  trait_vec <- trait_vec[!is.na(trait_vec) & is.finite(trait_vec)]
  if(length(trait_vec) < 3) stop("Need >=3 non-missing species.")
  debug_log("asr_tree.f input rows = %d, trait_vec = %d", nrow(df), length(trait_vec))
  

  tree <- ape::drop.tip(tree, setdiff(tree$tip.label, names(trait_vec)))
  tree <- ape::reorder.phylo(tree, "cladewise")
  Ntip <- length(tree$tip.label)
  Nnode <- tree$Nnode
  debug_log("asr_tree.f tree tips after prune = %d, nodes = %d", Ntip, Nnode)
  node_indices <- (Ntip+1):(Ntip+Nnode)

  # Sanitize trait vector to match pruned tree
  trait_vec <- trait_vec[tree$tip.label]

  # Also sanitize the data frame
  df <- df[df$species %in% tree$tip.label, ]
  df$species <- factor(df$species, levels = tree$tip.label)

  debug_log("asr_tree.f trait_vec after prune = %d", length(trait_vec))
  debug_log("asr_tree.f df after prune = %d", nrow(df))
  
  # --- Fit BM / lambda ---
  fit_model <- function(tree, trait, model=c("BM","lambda")) {
    model <- match.arg(model)
    df <- data.frame(trait=trait, species=names(trait))
    fit <- phylolm(trait~1, data=df, phy=tree, model=ifelse(model=="BM","BM","lambda"))
    aic <- fit$aic; k <- fit$p
    aicc <- aic + (2*k*(k+1))/(length(trait)-k-1)
    list(lnL=fit$logLik, sigma2=fit$sigma2, lambda=fit$optpar, aicc=aicc, raw=fit)
  }
  
  fit_bm <- fit_model(tree, trait_vec, "BM")
  fit_lambda <- fit_model(tree, trait_vec, "lambda")
  debug_log("asr_tree.f fit_bm aicc = %.3f, fit_lambda aicc = %.3f", fit_bm$aicc, fit_lambda$aicc)
  
  delta_aicc <- fit_bm$aicc - fit_lambda$aicc
  chosen_model <- ifelse(fit_lambda$aicc < fit_bm$aicc, "lambda", "BM")
  if(abs(delta_aicc)<2 && !is.null(fit_lambda$lambda) && abs(fit_lambda$lambda-1)<0.05) chosen_model <- "BM"
  debug_log("asr_tree.f chosen_model = %s", chosen_model)
  
  # --- Rescale tree & ASR ---
  if(chosen_model=="lambda"){
    lambda_hat <- fit_lambda$lambda
    if (is.null(lambda_hat) || is.na(lambda_hat)) {
      lambda_hat <- 1
    }
    debug_log("asr_tree.f lambda_hat = %.4f", lambda_hat)
    sigma2_used <- fit_lambda$sigma2
    rescaled_tree <- phytools::rescale(tree,"lambda",lambda=lambda_hat)
  } else {
    sigma2_used <- fit_bm$sigma2
    rescaled_tree <- phytools::rescale(tree,"BM")
  }
  debug_log("asr_tree.f sigma2_used = %.6f", sigma2_used)
  
  # Recalculate node_indices for rescaled tree (structure may be same, but let's be safe)
  Ntip_rescaled <- length(rescaled_tree$tip.label)
  Nnode_rescaled <- rescaled_tree$Nnode
  node_indices <- (Ntip_rescaled+1):(Ntip_rescaled+Nnode_rescaled)

  # Get point estimates via ACE with fallback to the original tree
  ace_try <- function(tree_obj) {
    tryCatch(
      ape::ace(x = trait_vec, phy = tree_obj, type = "continuous", method = "REML"),
      error = function(e) tryCatch(
        ape::ace(x = trait_vec, phy = tree_obj, type = "continuous", method = "ML"),
        error = function(e2) NULL
      )
    )
  }
  ace_res <- ace_try(rescaled_tree)
  model_used <- chosen_model
  if (is.null(ace_res)) {
    message("ACE failed on rescaled tree; retrying on original tree.")
    ace_res <- ace_try(tree)
    if (is.null(ace_res)) {
      message("ACE failed on original tree; falling back to fastAnc without CIs.")
      anc_est <- tryCatch(
        phytools::fastAnc(rescaled_tree, trait_vec),
        error = function(e) NULL
      )
      if (is.null(anc_est)) {
        debug_log("asr_tree.f fastAnc failed")
      } else {
        debug_log("asr_tree.f fastAnc ok, n = %d", length(anc_est))
      }
      if (is.null(anc_est)) {
        stop("ACE and fastAnc failed.")
      }
      ace_res <- list(
        ace = anc_est,
        CI95 = cbind(anc_est, anc_est)
      )
    } else {
      rescaled_tree <- tree
      Ntip_rescaled <- length(rescaled_tree$tip.label)
      Nnode_rescaled <- rescaled_tree$Nnode
      node_indices <- (Ntip_rescaled+1):(Ntip_rescaled+Nnode_rescaled)
      model_used <- "BM"
    }
  }
  
  node_est <- ace_res$ace
  tip_est <- trait_vec[rescaled_tree$tip.label]
  debug_log("asr_tree.f node_est = %d, tip_est = %d", length(node_est), length(tip_est))
  
  # --- Build node and tip tables ---
  # Use analytical CIs from ace
  node_ci <- ace_res$CI95
  node_df <- data.frame(
    node = node_indices,
    estimate = as.numeric(node_est[as.character(node_indices)]),
    ci_lower = as.numeric(node_ci[,1]),
    ci_upper = as.numeric(node_ci[,2]),
    stringsAsFactors = FALSE
  )
  # Tips don't have analytical CIs in ace output, use point estimates
  tip_df <- data.frame(
    tip = rescaled_tree$tip.label,
    estimate = as.numeric(tip_est[rescaled_tree$tip.label]),
    ci_lower = as.numeric(tip_est[rescaled_tree$tip.label]),
    ci_upper = as.numeric(tip_est[rescaled_tree$tip.label]),
    immediate_parent = NA,
    cumulative_derived = NA,
    direction = NA,
    parent_node_id = NA,
    parent_ci_lower = NA,
    parent_ci_upper = NA,
    stringsAsFactors = FALSE
  )
  
  # --- Derivedness tests: immediate-parent and cumulative-ancestors ---
  # Build parent map (use rescaled tree dimensions)
  parent_map <- integer(Ntip_rescaled + Nnode_rescaled)
  parent_map[] <- NA_integer_
  for (j in seq_len(nrow(rescaled_tree$edge))) {
    parent_map[rescaled_tree$edge[j,2]] <- rescaled_tree$edge[j,1]
  }
  
  # Test tips for derivedness
  for (i in seq_len(nrow(tip_df))) {
    tip_label <- tip_df$tip[i]
    tip_est_val <- tip_df$estimate[i]
    tip_idx <- which(rescaled_tree$tip.label == tip_label)
    parent_id <- parent_map[tip_idx]
    
    if (!is.na(parent_id)) {
      # Store parent node ID
      tip_df$parent_node_id[i] <- parent_id
      
      # Get parent node CI
      if (parent_id <= Ntip_rescaled) {
        parent_row <- tip_df[tip_df$tip == rescaled_tree$tip.label[parent_id], ]
      } else {
        parent_row <- node_df[node_df$node == parent_id, ]
      }
      parent_ci_low <- parent_row$ci_lower
      parent_ci_high <- parent_row$ci_upper
      
      # Immediate-parent derivedness test
      tip_df$immediate_parent[i] <- (tip_est_val > parent_ci_high) || (tip_est_val < parent_ci_low)
      
      if (tip_est_val > parent_ci_high) {
        tip_df$direction[i] <- "up"
      } else if (tip_est_val < parent_ci_low) {
        tip_df$direction[i] <- "down"
      } else {
        tip_df$direction[i] <- "none"
      }
      
      if (tip_df$immediate_parent[i]) {
        tip_df$parent_ci_lower[i] <- parent_ci_low
        tip_df$parent_ci_upper[i] <- parent_ci_high
      }
    }
    
    # Cumulative derivedness: derived from ALL ancestors
    anc_list <- integer(0)
    cur <- tip_idx
    while (!is.na(parent_map[cur])) {
      cur <- parent_map[cur]
      anc_list <- c(anc_list, cur)
    }
    anc_nodes <- anc_list[anc_list > Ntip_rescaled]
    
    if (length(anc_nodes) == 0) {
      tip_df$cumulative_derived[i] <- FALSE
    } else {
      anc_rows <- node_df[node_df$node %in% anc_nodes, ]
      if (nrow(anc_rows) == 0) {
        tip_df$cumulative_derived[i] <- NA
      } else {
        tip_df$cumulative_derived[i] <- all((tip_est_val > anc_rows$ci_upper) | (tip_est_val < anc_rows$ci_lower))
      }
    }
  }
  
  # --- Plot ---
  cont_obj <- phytools::contMap(rescaled_tree, trait_vec, plot=FALSE)
  debug_log("asr_tree.f contMap range = [%.4f, %.4f]", cont_obj$lims[1], cont_obj$lims[2])
  cont_obj <- phytools::setMap(cont_obj, colors=colorRampPalette(c("white","salmon3"))(100))
  h <- max(phytools::nodeHeights(cont_obj$tree))

  par(mar=c(5,5,10,7), oma=c(0,0,2,0))

  plot(cont_obj, fsize=1.8, lwd=6, res=320, legend=FALSE,
      xlim=c(-0.2*h, 2*h),
      cex.main=2.5)

  lwd_bar <- 10
  root_node_idx <- node_indices[1]
  root_est_str <- sprintf("%.2f (%.2f-%.2f)", 
                          node_df$estimate[1], 
                          node_df$ci_lower[1], 
                          node_df$ci_upper[1])
  
  phytools::add.color.bar(
    Ntip_rescaled - 1, 
    cont_obj$cols, 
    title = paste0(trait, " (", model_used, ")"),
    subtitle = "",
    lims=NULL,
    lwd=lwd_bar, 
    direction="upwards",
    x=-0.2*h, 
    y=1,
    prompt=FALSE, 
    fsize=1.5
  )
  
  # Add ticks and labels to color bar
  tick_x <- -0.2*h + lwd_bar / 20
  lines(x = rep(tick_x, 2), y = c(1, Ntip_rescaled))
  nticks <- 10
  
  Y <- seq(1, Ntip_rescaled, length.out = nticks)
  X <- cbind(rep(tick_x, nticks), rep(tick_x + 0.02*h, nticks))
  ticks <- seq(cont_obj$lims[1], cont_obj$lims[2], length.out = nticks)
  text(x = X[, 2], y = Y, labels = round(ticks, 3), pos = 4, cex = 1.5)
  
  # Add root annotation
  text(x = 0.02*h, y = Ntip_rescaled/1.45, labels = root_est_str, pos = 4, cex = 1.5, col = "black")
  
  # Collect parent nodes that have derived tips
  parent_nodes_with_derived_tips <- unique(tip_df$parent_node_id[tip_df$immediate_parent & !is.na(tip_df$parent_node_id)])
  
  # Add labels ONLY for parent nodes of derived tips
  for (parent_node in parent_nodes_with_derived_tips) {
    if (parent_node > Ntip_rescaled) {
      # It's an internal node
      parent_row <- node_df[node_df$node == parent_node, ]
      if (nrow(parent_row) > 0) {
        parent_est <- parent_row$estimate
        parent_ci_low <- parent_row$ci_lower
        parent_ci_high <- parent_row$ci_upper

        # Create label: estimate (ci_lower-ci_upper)
        parent_label <- sprintf("%.2f (%.2f-%.2f)", parent_est, parent_ci_low, parent_ci_high)

        # Display at the parent node
        ape::nodelabels(text = parent_label, node = parent_node,
                        frame = "none", cex = 1.5, col = "black", adj = c(-0.25, 0.4))
      }
    }
  }
    
  # Add tip symbols and labels
  for(i in seq_len(Ntip_rescaled)){
    tip_pos <- i
    
    if(tip_df$immediate_parent[i]){
      # Tip symbol for derived tips
      pch_val <- ifelse(tip_df$direction[i]=="up", 24, 25)
      bg_val <- ifelse(tip_df$direction[i]=="up", "salmon3", "skyblue3")
      ape::tiplabels(pch=pch_val, bg=bg_val, tip=tip_pos, cex=2, offset=58)
      
      # Tip value (larger to match parent node emphasis)
      ape::tiplabels(text=sprintf("%.2f", tip_df$estimate[i]), 
                    tip=tip_pos, frame="none", cex=1.5, offset=40)
      
      # Connection line from tip to tip value
      segments(x0 = h, y0 = tip_pos-0.4, x1 = h + 70, y1 = tip_pos-0.4, 
              col = "gray50", lty = 2)
    }
    
    # Cumulative derived marker
    if(tip_df$cumulative_derived[i]){
      ape::tiplabels(pch=22, bg="forestgreen", tip=tip_pos, cex=2, offset=62)
    }
  }
  
  mtext(paste0("Symbols: ▲ Tip Up, ▼ Tip Down, ■ Cumulatively derived; ",
              "Parent node values shown for derived tips."),
        side=3, line=-2, cex=1.2, adj=0.35)
  
  
  # --- Return ---
  list(
    chosen_model=chosen_model,
    node_table=node_df,
    tip_table=tip_df,
    sigma2_used=sigma2_used,
    ace=ace_res,
    rescaled_tree=rescaled_tree
  )
}

if (!plot_ready) {
  message("Skipping ASR plot: missing or empty trait stats.")
} else {
  asr_plot_path <- file.path(asr_trees, paste0(trait, "_asr_tree.png"))
  tryCatch(
    {
      grDevices::png(
        filename = asr_plot_path,
        width = 17, height = 20, res = 320, bg = "white", units = "in"
      )
      asr_result <- asr_tree.f(stats_df, pruned_tree, trait, taxon_of_interest)
      grDevices::dev.off()
    },
    error = function(e) {
      message("ASR plot file failed: ", e$message)
      try(grDevices::dev.off(), silent = TRUE)
    }
  )
  tryCatch(
    asr_tree.f(stats_df, pruned_tree, trait, taxon_of_interest),
    error = function(e) message("ASR report plot failed: ", e$message)
  )
}
## [DEBUG] asr_tree.f input rows = 40, trait_vec = 40
## [DEBUG] asr_tree.f tree tips after prune = 40, nodes = 39
## [DEBUG] asr_tree.f trait_vec after prune = 40
## [DEBUG] asr_tree.f df after prune = 40
## [DEBUG] asr_tree.f fit_bm aicc = -92.471, fit_lambda aicc = -110.169
## [DEBUG] asr_tree.f chosen_model = lambda
## [DEBUG] asr_tree.f lambda_hat = 0.7224
## [DEBUG] asr_tree.f sigma2_used = 0.000090
## [DEBUG] asr_tree.f node_est = 39, tip_est = 40
## [DEBUG] asr_tree.f contMap range = [0.0000, 0.2456]
## [DEBUG] asr_tree.f input rows = 40, trait_vec = 40
## [DEBUG] asr_tree.f tree tips after prune = 40, nodes = 39
## [DEBUG] asr_tree.f trait_vec after prune = 40
## [DEBUG] asr_tree.f df after prune = 40
## [DEBUG] asr_tree.f fit_bm aicc = -92.471, fit_lambda aicc = -110.169
## [DEBUG] asr_tree.f chosen_model = lambda
## [DEBUG] asr_tree.f lambda_hat = 0.7224
## [DEBUG] asr_tree.f sigma2_used = 0.000090
## [DEBUG] asr_tree.f node_est = 39, tip_est = 40
## [DEBUG] asr_tree.f contMap range = [0.0000, 0.2456]

## $chosen_model
## [1] "lambda"
## 
## $node_table
##    node   estimate      ci_lower   ci_upper
## 1    41 0.09896118  0.0359883316 0.16193403
## 2    42 0.06949767  0.0215066960 0.11748865
## 3    43 0.06363512  0.0232715227 0.10399872
## 4    44 0.05826366  0.0198068423 0.09672048
## 5    45 0.05309657  0.0193881390 0.08680501
## 6    46 0.05414984  0.0206629039 0.08763679
## 7    47 0.05333957  0.0189048606 0.08777427
## 8    48 0.04069739  0.0054445329 0.07595025
## 9    49 0.04004223  0.0041580455 0.07592641
## 10   50 0.03797845 -0.0001727995 0.07612969
## 11   51 0.06978791  0.0269167659 0.11265906
## 12   52 0.04002522  0.0035838674 0.07646658
## 13   53 0.03988819  0.0035094876 0.07626689
## 14   54 0.03746653 -0.0002424172 0.07517548
## 15   55 0.05495968  0.0130575386 0.09686182
## 16   56 0.07387743  0.0270006677 0.12075419
## 17   57 0.06164196  0.0167898187 0.10649411
## 18   58 0.04432432  0.0052860151 0.08336262
## 19   59 0.03772425  0.0002659611 0.07518254
## 20   60 0.03231648 -0.0025244584 0.06715742
## 21   61 0.02710281 -0.0084506480 0.06265626
## 22   62 0.02525672 -0.0122571101 0.06277054
## 23   63 0.02726498 -0.0100980963 0.06462806
## 24   64 0.03128019 -0.0051589479 0.06771933
## 25   65 0.02845119 -0.0112884218 0.06819080
## 26   66 0.04454089  0.0022043860 0.08687740
## 27   67 0.04595211  0.0049526727 0.08695154
## 28   68 0.04184465  0.0007973317 0.08289196
## 29   69 0.06559742  0.0161737022 0.11502113
## 30   70 0.04593903 -0.0060722109 0.09795028
## 31   71 0.11395202  0.0571913881 0.17071264
## 32   72 0.13774961  0.0865739033 0.18892531
## 33   73 0.14656711  0.0960390225 0.19709520
## 34   74 0.14833443  0.0965408725 0.20012798
## 35   75 0.15101948  0.1009994977 0.20103946
## 36   76 0.13876466  0.0865814334 0.19094789
## 37   77 0.10330135  0.0472483703 0.15935432
## 38   78 0.13294787  0.0821360335 0.18375971
## 39   79 0.06294624  0.0132618858 0.11263058
## 
## $tip_table
##                           tip    estimate    ci_lower    ci_upper
## 1           Callimico_goeldii 0.101449275 0.101449275 0.101449275
## 2            Cebuella_pygmaea 0.048780488 0.048780488 0.048780488
## 3             Mico_argentatus 0.000000000 0.000000000 0.000000000
## 4        Callithrix_geoffroyi 0.032258065 0.032258065 0.032258065
## 5          Callithrix_jacchus 0.024390244 0.024390244 0.024390244
## 6      Leontopithecus_rosalia 0.055555556 0.055555556 0.055555556
## 7  Leontopithecus_chrysomelas 0.117647059 0.117647059 0.117647059
## 8            Saguinus_oedipus 0.074626866 0.074626866 0.074626866
## 9            Saguinus_bicolor 0.040000000 0.040000000 0.040000000
## 10             Saguinus_midas 0.000000000 0.000000000 0.000000000
## 11         Saguinus_imperator 0.000000000 0.000000000 0.000000000
## 12             Sapajus_apella 0.000000000 0.000000000 0.000000000
## 13           Saimiri_sciureus 0.051724138 0.051724138 0.051724138
## 14            Alouatta_caraya 0.031250000 0.031250000 0.031250000
## 15           Ateles_geoffroyi 0.186046512 0.186046512 0.186046512
## 16        Macaca_fascicularis 0.000000000 0.000000000 0.000000000
## 17             Macaca_fuscata 0.023255814 0.023255814 0.023255814
## 18             Macaca_silenus 0.029411765 0.029411765 0.029411765
## 19               Macaca_nigra 0.027777778 0.027777778 0.027777778
## 20       Theropithecus_gelada 0.019230769 0.019230769 0.019230769
## 21            Papio_hamadryas 0.009708738 0.009708738 0.009708738
## 22          Mandrillus_sphinx 0.044444444 0.044444444 0.044444444
## 23    Cercopithecus_neglectus 0.035714286 0.035714286 0.035714286
## 24     Trachypithecus_auratus 0.037037037 0.037037037 0.037037037
## 25   Trachypithecus_cristatus 0.000000000 0.000000000 0.000000000
## 26   Trachypithecus_francoisi 0.100000000 0.100000000 0.100000000
## 27            Colobus_guereza 0.041237113 0.041237113 0.041237113
## 28            Pan_troglodytes 0.010526316 0.010526316 0.010526316
## 29            Gorilla_gorilla 0.029850746 0.029850746 0.029850746
## 30              Hylobates_lar 0.159090909 0.159090909 0.159090909
## 31             Eulemur_macaco 0.200000000 0.200000000 0.200000000
## 32                Lemur_catta 0.131147541 0.131147541 0.131147541
## 33              Varecia_rubra 0.135593220 0.135593220 0.135593220
## 34          Varecia_variegata 0.171052632 0.171052632 0.171052632
## 35      Propithecus_coquereli 0.080000000 0.080000000 0.080000000
## 36         Microcebus_murinus 0.245614035 0.245614035 0.245614035
## 37        Nycticebus_pygmaeus 0.224489796 0.224489796 0.224489796
## 38         Nycticebus_coucang 0.083333333 0.083333333 0.083333333
## 39        Galago_senegalensis 0.015151515 0.015151515 0.015151515
## 40              Galago_moholi 0.062500000 0.062500000 0.062500000
##    immediate_parent cumulative_derived direction parent_node_id parent_ci_lower
## 1              TRUE              FALSE        up             47    0.0189048606
## 2             FALSE              FALSE      none             49              NA
## 3              TRUE               TRUE      down             49    0.0041580455
## 4             FALSE              FALSE      none             50              NA
## 5             FALSE              FALSE      none             50              NA
## 6             FALSE              FALSE      none             51              NA
## 7              TRUE              FALSE        up             51    0.0269167659
## 8             FALSE              FALSE      none             53              NA
## 9             FALSE              FALSE      none             54              NA
## 10            FALSE              FALSE      none             54              NA
## 11             TRUE               TRUE      down             52    0.0035838674
## 12             TRUE               TRUE      down             55    0.0130575386
## 13            FALSE              FALSE      none             55              NA
## 14            FALSE              FALSE      none             56              NA
## 15             TRUE               TRUE        up             56    0.0270006677
## 16            FALSE              FALSE      none             62              NA
## 17            FALSE              FALSE      none             62              NA
## 18            FALSE              FALSE      none             63              NA
## 19            FALSE              FALSE      none             63              NA
## 20            FALSE              FALSE      none             65              NA
## 21            FALSE              FALSE      none             65              NA
## 22            FALSE              FALSE      none             64              NA
## 23            FALSE              FALSE      none             59              NA
## 24            FALSE              FALSE      none             68              NA
## 25             TRUE               TRUE      down             68    0.0007973317
## 26             TRUE              FALSE        up             67    0.0049526727
## 27            FALSE              FALSE      none             66              NA
## 28            FALSE              FALSE      none             70              NA
## 29            FALSE              FALSE      none             70              NA
## 30             TRUE              FALSE        up             69    0.0161737022
## 31            FALSE              FALSE      none             74              NA
## 32            FALSE              FALSE      none             74              NA
## 33            FALSE              FALSE      none             75              NA
## 34            FALSE              FALSE      none             75              NA
## 35             TRUE              FALSE      down             76    0.0865814334
## 36             TRUE               TRUE        up             76    0.0865814334
## 37             TRUE               TRUE        up             78    0.0821360335
## 38            FALSE              FALSE      none             78              NA
## 39            FALSE              FALSE      none             79              NA
## 40            FALSE              FALSE      none             79              NA
##    parent_ci_upper
## 1       0.08777427
## 2               NA
## 3       0.07592641
## 4               NA
## 5               NA
## 6               NA
## 7       0.11265906
## 8               NA
## 9               NA
## 10              NA
## 11      0.07646658
## 12      0.09686182
## 13              NA
## 14              NA
## 15      0.12075419
## 16              NA
## 17              NA
## 18              NA
## 19              NA
## 20              NA
## 21              NA
## 22              NA
## 23              NA
## 24              NA
## 25      0.08289196
## 26      0.08695154
## 27              NA
## 28              NA
## 29              NA
## 30      0.11502113
## 31              NA
## 32              NA
## 33              NA
## 34              NA
## 35      0.19094789
## 36      0.19094789
## 37      0.18375971
## 38              NA
## 39              NA
## 40              NA
## 
## $sigma2_used
## [1] 9.018698e-05
## 
## $ace
## 
##     Ancestral Character Estimation
## 
## Call: ape::ace(x = trait_vec, phy = tree_obj, type = "continuous", 
##     method = "REML")
## 
##     Residual log-likelihood: 343.333 
## 
## $ace
##         41         42         43         44         45         46         47 
## 0.09896118 0.06949767 0.06363512 0.05826366 0.05309657 0.05414984 0.05333957 
##         48         49         50         51         52         53         54 
## 0.04069739 0.04004223 0.03797845 0.06978791 0.04002522 0.03988819 0.03746653 
##         55         56         57         58         59         60         61 
## 0.05495968 0.07387743 0.06164196 0.04432432 0.03772425 0.03231648 0.02710281 
##         62         63         64         65         66         67         68 
## 0.02525672 0.02726498 0.03128019 0.02845119 0.04454089 0.04595211 0.04184465 
##         69         70         71         72         73         74         75 
## 0.06559742 0.04593903 0.11395202 0.13774961 0.14656711 0.14833443 0.15101948 
##         76         77         78         79 
## 0.13876466 0.10330135 0.13294787 0.06294624 
## 
## $sigma2
## [1] 8.970461e-05 1.113633e-04
## 
## $CI95
##             [,1]       [,2]
## 41  0.0359883316 0.16193403
## 42  0.0215066960 0.11748865
## 43  0.0232715227 0.10399872
## 44  0.0198068423 0.09672048
## 45  0.0193881390 0.08680501
## 46  0.0206629039 0.08763679
## 47  0.0189048606 0.08777427
## 48  0.0054445329 0.07595025
## 49  0.0041580455 0.07592641
## 50 -0.0001727995 0.07612969
## 51  0.0269167659 0.11265906
## 52  0.0035838674 0.07646658
## 53  0.0035094876 0.07626689
## 54 -0.0002424172 0.07517548
## 55  0.0130575386 0.09686182
## 56  0.0270006677 0.12075419
## 57  0.0167898187 0.10649411
## 58  0.0052860151 0.08336262
## 59  0.0002659611 0.07518254
## 60 -0.0025244584 0.06715742
## 61 -0.0084506480 0.06265626
## 62 -0.0122571101 0.06277054
## 63 -0.0100980963 0.06462806
## 64 -0.0051589479 0.06771933
## 65 -0.0112884218 0.06819080
## 66  0.0022043860 0.08687740
## 67  0.0049526727 0.08695154
## 68  0.0007973317 0.08289196
## 69  0.0161737022 0.11502113
## 70 -0.0060722109 0.09795028
## 71  0.0571913881 0.17071264
## 72  0.0865739033 0.18892531
## 73  0.0960390225 0.19709520
## 74  0.0965408725 0.20012798
## 75  0.1009994977 0.20103946
## 76  0.0865814334 0.19094789
## 77  0.0472483703 0.15935432
## 78  0.0821360335 0.18375971
## 79  0.0132618858 0.11263058
## 
## 
## $rescaled_tree
## 
## Phylogenetic tree with 40 tips and 39 internal nodes.
## 
## Tip labels:
##   Callimico_goeldii, Cebuella_pygmaea, Mico_argentatus, Callithrix_geoffroyi, Callithrix_jacchus, Leontopithecus_rosalia, ...
## 
## Rooted; includes branch length(s).

6. Fan Plot for Phylogenetic Distribution

This section creates a fan-shaped phylogenetic tree.

# 5. PHYLOGENETIC DISTRIBUTION PLOT  ------------------------------------------------------
palette_values <- get_palette_values()
fill_scale <- if (is.null(palette_values)) {
  ggplot2::scale_fill_discrete()
} else {
  ggplot2::scale_fill_manual(values = palette_values)
}
color_scale <- if (is.null(palette_values)) {
  ggplot2::scale_color_discrete()
} else {
  ggplot2::scale_color_manual(values = palette_values)
}

# Create base phylogenetic tree in fan layout
base_fan_tree <- ggtree(pruned_tree,
                        layout="fan",
                        open.angle=15, 
                        size=2)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
ggsave(file.path(phylo_distribution_dir, "big_tree.png"), base_fan_tree, device = "png", width = 15, height = 15, dpi = "retina")

ordered_species <- rev(pruned_tree$tip.label)
row.names(stats_df) <- stats_df$species

# Prepare trait data for tree visualization
tree_trait_data <- stats_df %>%
  group_by(species, taxa)

# Is there a branch color trait specified?
if (isTRUE(has.branch)) {
  tree_trait_data <- tree_trait_data %>%
    mutate(
      br_trait = .data[[branch_trait]]
    )

  # Perform ancestral state reconstruction for LQ trait
  br_trait_vec <- tree_trait_data$br_trait
  names(br_trait_vec) <- tree_trait_data$species
  br_asr_fit <- phytools::fastAnc(pruned_tree, br_trait_vec, vars = TRUE, CI = TRUE)

  # Create data frames for tip and node values
  tip_br_data <- data.frame(
    node = nodeid(pruned_tree, names(br_trait_vec)),
    BR = br_trait_vec
  )
  node_br_data <- data.frame(
    node = names(br_asr_fit$ace), 
    BR = br_asr_fit$ace
  )
  combined_br_data <- rbind(tip_br_data, node_br_data)
  combined_br_data$node <- as.numeric(combined_br_data$node)
}
  
# This section is problematic. We have three optional colums: p, branch_trait, and second_trait.
if (isTRUE(has.p) && isTRUE(has.secondary)) { # Now for both has.p and has.secondary TRUE
  tree_trait_data <- tree_trait_data %>%
    dplyr::rename(secondary_value = .data[[secondary_trait]]) %>%
    summarize(
      trait_sum = sum(value, na.rm = TRUE),
      p_sum = sum(p, na.rm = TRUE),
      secondary_sum = sum(secondary_value, na.rm = TRUE),
      .groups = "drop"
    ) %>%
    ungroup()
} else if (isTRUE(has.p)) {
  tree_trait_data <- tree_trait_data %>%
    summarize(
      trait_sum = sum(value, na.rm = TRUE),
      p_sum = sum(p, na.rm = TRUE),
      .groups = "drop") %>%
    ungroup()
} else if (isTRUE(has.secondary)) { # If has.secondary are TRUE, we would need to adjust accordingly.
  tree_trait_data <- tree_trait_data %>%
    dplyr::rename(secondary_value = .data[[secondary_trait]]) %>%
    summarize(
      trait_sum = sum(value, na.rm = TRUE),
      secondary_sum = sum(secondary_value, na.rm = TRUE),
      .groups = "drop") %>%
    ungroup()
} else { # If neither is TRUE  
  tree_trait_data <- tree_trait_data %>%
    summarize(
      trait_sum = sum(value, na.rm = TRUE),
      .groups = "drop") %>%
    ungroup()
}
## Warning: Use of .data in tidyselect expressions was deprecated in tidyselect 1.2.0.
## ℹ Please use `all_of(var)` (or `any_of(var)`) instead of `.data[[var]]`
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Convert tree to tibble and join with trait data
tree_tibble <- tidytree::as_tibble(pruned_tree) %>%
  dplyr::rename(species = label)

tree_with_traits <- full_join(tree_tibble, tree_trait_data, by = 'species')

# Again, if branch color trait is present, join that data as well
if (isTRUE(has.branch)) {
  tree_with_traits <- full_join(tree_with_traits, combined_br_data, by = 'node')
}

print(tree_with_traits)
## # A tibble: 79 × 9
##    parent  node branch.length species  taxa  trait_sum p_sum secondary_sum    BR
##     <int> <dbl>         <dbl> <chr>    <fct>     <dbl> <int>         <dbl> <dbl>
##  1     47     1        10.4   Callimi… Cebi…    0.101     69        0.217  1.04 
##  2     49     2         3.48  Cebuell… Cebi…    0.0488    41        0.0976 1.16 
##  3     49     3         3.48  Mico_ar… Cebi…    0         20        0.05   0.846
##  4     50     4         0.665 Callith… Cebi…    0.0323    31        0.0968 0.903
##  5     50     5         0.665 Callith… Cebi…    0.0244    41        0.0244 1.24 
##  6     51     6         0.728 Leontop… Cebi…    0.0556    90        0.122  1.43 
##  7     51     7         0.728 Leontop… Cebi…    0.118     51        0.118  1.00 
##  8     53     8         3.25  Saguinu… Cebi…    0.0746   134        0.172  1.28 
##  9     54     9         1.53  Saguinu… Cebi…    0.04      25        0.04   0.909
## 10     54    10         1.53  Saguinu… Cebi…    0         22        0      0.984
## # ℹ 69 more rows
# Export dataframe for debug
write.csv(tree_with_traits, file.path(phylo_distribution_dir, "tree_with_traits.csv"), row.names = FALSE)

# Convert to treedata object for ggtree
tree_data_object <- tidytree::as.treedata(tree_with_traits)

# Create diagnostic plot showing node numbers and family colors
diagnostic_tree <- ggtree(tree_data_object,
                          layout = "fan",
                          open.angle = 15,
                          size = 2) +
  geom_text2(aes(label = node), hjust = -0.5, vjust = -0.5, size = 6) +
  aes(color = taxa) +
  color_scale
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
diagnostic_tree

# Save diagnostic tree
ggsave(file.path(phylo_distribution_dir, "diagnostic_tree.png"), diagnostic_tree, device = "png", width = 15, height = 15, dpi = "retina")

# If branch color trait is present, add it to the tree plot
if (isTRUE(has.branch)) {
  capitalized_branch_trait <- tools::toTitleCase(gsub("_", " ", branch_trait))
  # Create base tree with BR gradient
  tree_plot_step1 <- ggtree(tree_data_object, aes(color = BR),
                                  layout="fan",
                                  open.angle=15, 
                                  size=2) +
    scale_color_gradient(
      name = capitalized_branch_trait, 
      low = "skyblue", high = "salmon3",     
                        guide = guide_colorbar(order = 3)) +  # BR last
    new_scale_color()
} else {
  # Add trait bars with gradient
  tree_plot_step1 <- ggtree(tree_data_object,
                                  layout="fan",
                                  open.angle=15, 
                                  size=2)
}  
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
tree_plot_step1 <- tree_plot_step1 +
  # Trait bars
  geom_fruit(
    geom = geom_col,
    mapping = aes(y = species, x = trait_sum, fill = trait_sum, width = 0.5),
    alpha = 0.5,  # Set transparency for lighter appearance
    show.legend = TRUE,
    pwidth = 0.75,
    color = "black",  # Add black contour
    size = 0.7,
    axis.params = list(
      axis = 'x',
      text.size = 8,
      nbreak = 2,
      text.angle = 270,
      vjust = 0.5,
      hjust = 0,
      limits = c(0, 40)
    ),
    grid.params = list()
  ) +
  scale_fill_gradient2(
    name = capitalized_trait, 
    low = "white", 
    high = "darkseagreen4",  # Different gradient color for distinction
    guide = guide_colorbar(order = 1),
    aesthetics = "fill"
  ) +
  new_scale_fill()
## Warning in geom_col(data = structure(list(parent = c(47, 49, 49, 50, 50, :
## Ignoring unknown aesthetics: width
  # If secondary trait is present, add secondary trait bars
if (isTRUE(has.secondary)) {
  capitalized_secondary_trait <- tools::toTitleCase(gsub("_", " ", secondary_trait))

  tree_plot_step1 <- tree_plot_step1 +
    geom_fruit(
      geom = geom_col,
      offset = -0.75,
      mapping = aes(y = species, x = secondary_sum, fill = secondary_sum, width = 0.5),
      alpha = 0.5,  # Set transparency for lighter appearance
      show.legend = TRUE,
      pwidth = 0.75,
      color = "black",  # Add black contour
      size = 0.7,
      axis.params = list(
        axis = 'x',
        text.size = 0,
        nbreak = 1,
        text.angle = 270,
        vjust = 0.5,
        hjust = 0,
        limits = c(0, 20)
      ),
      grid.params = list()
    ) +
    scale_fill_gradient2(
      name = capitalized_secondary_trait, 
      low = "white", 
      high = "salmon3",  # Different gradient color for distinction
      guide = guide_colorbar(order = 2),
      aesthetics = "fill"
    ) +
    new_scale_fill()
}
## Warning in geom_col(data = structure(list(parent = c(47, 49, 49, 50, 50, :
## Ignoring unknown aesthetics: width
tree_plot_step1

# Add taxa membership indicator bars
tree_plot_step2 <- tree_plot_step1 +
  geom_fruit(geom = geom_col, 
             mapping = aes(y = species, x = 1, fill = taxa),
             pwidth = 0.1, color = "black", linewidth = 0.5, offset = 0) +
  scale_fill_manual(values = palette_values, breaks = 0) +
  new_scale_fill() +
  theme_tree() +
  theme(panel.background = element_rect(fill = "transparent", colour = NA),  
        plot.background = element_rect(fill = "transparent", colour = NA),
        legend.position = "none",
        plot.title = element_text(size = 17, margin = margin(t = 0, r = 0, b = -1.25, l = 0, unit = "cm")),
        plot.subtitle = element_text(size = 15, margin = margin(t = 0, r = 0, b = -1.5, l = 0, unit = "cm"))
  )

tree_plot_step2

# Add p labels (if applicable)
if(isTRUE(has.p)){
  tree_plot_step3 <- tree_plot_step2 +
    geom_text(aes(label = p_sum), nudge_x = 6.1, fontface = "bold", size = 6, vjust = 0.5)
} else {
  tree_plot_step3 <- tree_plot_step2
}

tree_plot_step3
## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_text()`).

# Finalize tree with taxon labels and phylopic images
# Build taxon to tip mapping
tip_df <- find_taxon_mrca(pruned_tree, tree_with_traits)

# Get phylopic data
tip_df <- tip_df %>%
  rowwise() %>%  # Apply function row by row
  mutate(phylopic_UUID = rphylopic::get_uuid(name = taxa, n = 1))

# Save for debug
write.csv(tip_df, file.path(phylo_distribution_dir, "taxon_node_mapping.csv"), row.names = FALSE)

# Add family labels and phylopic images
final_tree_plot <- tree_plot_step3 +
  geom_cladelab(data = tip_df,
                mapping = aes(
                  node = mrca_node,
                  label = taxa,
                  image = phylopic_UUID,
                  color = taxa),
                geom="phylopic",
                barsize = NA,
                offset = 12,
                imagesize = 0.05,
                alpha = 0.75) +
  scale_color_manual(values = palette_values, breaks = 0) +
  geom_cladelab(data = tip_df,
                mapping = aes(
                  node = mrca_node,
                  label = taxa),
                show.legend = FALSE,
                          color = "black",
                angle = "auto",
                horizontal = TRUE,
                offset = 10,
                barsize = NA,
                fontsize = 9.5,
                fontface = "bold") +
  theme_tree() +
  theme(panel.background = element_rect(fill = "transparent", colour = NA),  
        plot.background = element_rect(fill = "transparent", colour = NA),
        legend.position = "right",
        legend.spacing.y = unit(1.5, 'cm'),
        legend.title = element_text(
          size = 15, face = "bold", 
          margin = margin(b = 15)),
        legend.key.size = unit(1, 'cm'),
        legend.text = element_text(size = 13),
        plot.title = element_text(size = 17, margin = margin(t = 0, r = 0, b = -1.25, l = 0, unit = "cm")),
        plot.subtitle = element_text(size = 15, margin = margin(t = 0, r = 0, b = -1.5, l = 0, unit = "cm"))
  )
## ! The "taxa" has(have) been found in tree data. You might need to rename the
## variable(s) in the data of "geom_cladelab" to avoid this warning!
## ! The "taxa" has(have) been found in tree data. You might need to rename the
## variable(s) in the data of "geom_cladelab" to avoid this warning!
final_tree_plot
## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_text()`).
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_segment()`).
## Removed 10 rows containing missing values or values outside the scale range
## (`geom_segment()`).

# Save progressive tree plots
ggsave(file.path(phylo_distribution_dir, "tree_with_trait_bars.png"), tree_plot_step1, device = "png", width = 15, height = 15, dpi = "retina")
ggsave(file.path(phylo_distribution_dir, "tree_with_taxa_bars.png"), tree_plot_step2, device = "png", width = 15, height = 15, dpi = "retina")
if(isTRUE(has.p)){
  ggsave(file.path(phylo_distribution_dir, "tree_with_p_labels.png"), tree_plot_step3, device = "png", width = 15, height = 15, dpi = "retina")
}
## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_text()`).
ggsave(file.path(phylo_distribution_dir, "final_tree_plot.png"), final_tree_plot, device = "png", width = 17, height = 15, dpi = "retina")
## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_text()`).
## Removed 10 rows containing missing values or values outside the scale range
## (`geom_segment()`).
## Removed 10 rows containing missing values or values outside the scale range
## (`geom_segment()`).

Session Information

if (length(phylo_debug_log) > 0) {
  cat("### Debug log\n")
  cat(paste0("[DEBUG] ", phylo_debug_log, "\n"))
}

Debug log

[DEBUG] args = cancer_traits_processed-LQ.csv | science.abn7829_data_s4.nex.tree | data_exploration | 1998 | primates | family | malignant_prevalence | adult_necropsy_count | malignant_count | /home/miguel/IBE-UPF/PhD/PhyloPhere/Data/5.Phylogeny/taxid_species_family.tsv | neoplasia_prevalence | LQ [DEBUG] seed_val = 1998 [DEBUG] workingDir = /home/miguel/IBE-UPF/PhD/PhyloPhere/work/47/c58fe8ab044531b921244e210c9e62 [DEBUG] objDir = /home/miguel/IBE-UPF/PhD/PhyloPhere/work/47/c58fe8ab044531b921244e210c9e62/obj [DEBUG] resultsDir = data_exploration [DEBUG] Working Directory: /home/miguel/IBE-UPF/PhD/PhyloPhere/work/47/c58fe8ab044531b921244e210c9e62 [DEBUG] Results Directory: data_exploration [DEBUG] data_exploration_dir = data_exploration/1.Data-exploration [DEBUG] species_distribution_dir = data_exploration/1.Data-exploration/1.Species_distribution [DEBUG] extreme_plots_dir = data_exploration/1.Data-exploration/2.Extreme_plots [DEBUG] asr_trees = data_exploration/1.Data-exploration/3.ASR_trees [DEBUG] phylo_distribution_dir = data_exploration/1.Data-exploration/4.Phylogenetic_distribution [DEBUG] ci_dir = data_exploration/1.Data-exploration/5.CI_overlaps [DEBUG] palettes loaded: primates=15, primates_dark=15, mammals=17 [DEBUG] trait_path = cancer_traits_processed-LQ.csv [DEBUG] trait_df rows = 47, cols = 19 [DEBUG] trait_df columns: tax_id, species, common_name, family, adult_necropsy_count, neoplasia_necropsy, neoplasia_prevalence, benign_count, benign_prevalence, malignant_count, malignant_prevalence, body_mass, mass_g, log_body_mass, mature_f, mature_m, mature_age, MLS.anage, LQ [DEBUG] trait_df species unique = 47 [DEBUG] tree_path = science.abn7829_data_s4.nex.tree [DEBUG] tree tips = 236, nodes = 235 [DEBUG] clade_name = primates [DEBUG] taxon_of_interest = family [DEBUG] trait = malignant_prevalence [DEBUG] n_trait = malignant_count [DEBUG] p_trait = adult_necropsy_count [DEBUG] has.p = TRUE, p missing = 0 [DEBUG] has.n = TRUE, n missing = 0 [DEBUG] tax_id_file = /home/miguel/IBE-UPF/PhD/PhyloPhere/Data/5.Phylogeny/taxid_species_family.tsv [DEBUG] tax_id_df rows = 1290, distinct taxa = 807 [DEBUG] has.TAX_ID = TRUE [DEBUG] trait_df merged with tax_id_df: rows = 47, missing tax_id = 0 [DEBUG] normalized tax_id from merged columns, missing tax_id = 0 [DEBUG] tree_ids rows = 40, missing tax_id = 0 [DEBUG] common_tax_ids = 40 [DEBUG] pruned_tree tips (TAX_ID) = 40, nodes = 39 [DEBUG] trait_df after TAX_ID tree filter rows = 40 [DEBUG] secondary_trait = neoplasia_prevalence [DEBUG] has.secondary = TRUE, missing = 0 [DEBUG] branch_trait = LQ [DEBUG] has.branch = TRUE, missing = 0 [DEBUG] createDir: created data_exploration/1.Data-exploration/2.Extreme_plots [DEBUG] createDir: created data_exploration/1.Data-exploration/3.ASR_trees [DEBUG] createDir: created data_exploration/1.Data-exploration/4.Phylogenetic_distribution [DEBUG] contrast_plot.f using p_trait = adult_necropsy_count, missing p = 0 [DEBUG] contrast_plot.f ordered_species = 40 [DEBUG] contrast_plot.f rows = 40, trait = malignant_prevalence, taxon = family [DEBUG] contrast_plot.f columns: species, family, malignant_prevalence, malignant_count, adult_necropsy_count, neoplasia_prevalence, LQ, taxa_mean, taxa_median, taxa_sd, taxa_q25, taxa_q75, value, g_mean, g_median, g_sd, g_q25, g_q75, outlier, extreme_outlier, global_label, taxa_outlier, extreme_taxa_outlier, taxa_label, taxa, common_name, p [DEBUG] violin_extremes.f rows = 40, trait = malignant_prevalence, taxon = family [DEBUG] violin_extremes.f ordered_species = 40 [DEBUG] asr_tree.f input rows = 40, trait_vec = 40 [DEBUG] asr_tree.f tree tips after prune = 40, nodes = 39 [DEBUG] asr_tree.f trait_vec after prune = 40 [DEBUG] asr_tree.f df after prune = 40 [DEBUG] asr_tree.f fit_bm aicc = -92.471, fit_lambda aicc = -110.169 [DEBUG] asr_tree.f chosen_model = lambda [DEBUG] asr_tree.f lambda_hat = 0.7224 [DEBUG] asr_tree.f sigma2_used = 0.000090 [DEBUG] asr_tree.f node_est = 39, tip_est = 40 [DEBUG] asr_tree.f contMap range = [0.0000, 0.2456] [DEBUG] asr_tree.f input rows = 40, trait_vec = 40 [DEBUG] asr_tree.f tree tips after prune = 40, nodes = 39 [DEBUG] asr_tree.f trait_vec after prune = 40 [DEBUG] asr_tree.f df after prune = 40 [DEBUG] asr_tree.f fit_bm aicc = -92.471, fit_lambda aicc = -110.169 [DEBUG] asr_tree.f chosen_model = lambda [DEBUG] asr_tree.f lambda_hat = 0.7224 [DEBUG] asr_tree.f sigma2_used = 0.000090 [DEBUG] asr_tree.f node_est = 39, tip_est = 40 [DEBUG] asr_tree.f contMap range = [0.0000, 0.2456]

sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-conda-linux-gnu
## Running under: TUXEDO OS
## 
## Matrix products: default
## BLAS/LAPACK: /home/miguel/micromamba/envs/caas_global_cancer/lib/libopenblasp-r0.3.28.so;  LAPACK version 3.12.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=es_ES.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=es_ES.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=es_ES.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Madrid
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] rphylopic_1.5.0    geiger_2.0.11      phytools_2.4-4     maps_3.4.2.1      
##  [5] ggtreeExtra_1.16.0 phylolm_2.6.5      tidytree_0.4.6     treeio_1.30.0     
##  [9] colorspace_2.1-1   ggstar_1.0.4       ggnewscale_0.5.1   ggtree_3.14.0     
## [13] ggrepel_0.9.6      reshape2_1.4.4     ggplot2_3.5.1      ape_5.8-1         
## [17] tidyr_1.3.1        dplyr_1.1.4        readr_2.1.5       
## 
## loaded via a namespace (and not attached):
##  [1] mnormt_2.1.1            pbapply_1.7-2           gridExtra_2.3          
##  [4] phangorn_2.12.1         rlang_1.1.5             magrittr_2.0.3         
##  [7] compiler_4.4.1          png_0.1-8               systemfonts_1.2.1      
## [10] vctrs_0.6.5             combinat_0.0-8          quadprog_1.5-8         
## [13] stringr_1.5.1           crayon_1.5.3            pkgconfig_2.0.3        
## [16] fastmap_1.2.0           magick_2.8.6            labeling_0.4.3         
## [19] utf8_1.2.4              subplex_1.9             deSolve_1.40           
## [22] rmarkdown_2.29          tzdb_0.5.0              ragg_1.3.3             
## [25] purrr_1.0.4             xfun_0.51               cachem_1.1.0           
## [28] aplot_0.2.5             clusterGeneration_1.3.8 jsonlite_1.9.1         
## [31] jpeg_0.1-11             parallel_4.4.1          R6_2.6.1               
## [34] bslib_0.9.0             stringi_1.8.4           parallelly_1.43.0      
## [37] jquerylib_0.1.4         numDeriv_2016.8-1.1     Rcpp_1.0.14            
## [40] iterators_1.0.14        knitr_1.50              future.apply_1.11.3    
## [43] optimParallel_1.0-2     base64enc_0.1-3         Matrix_1.7-3           
## [46] igraph_2.1.4            tidyselect_1.2.1        yaml_2.3.10            
## [49] doParallel_1.0.17       codetools_0.2-20        curl_6.2.2             
## [52] listenv_0.9.1           lattice_0.22-6          tibble_3.2.1           
## [55] plyr_1.8.9              withr_3.0.2             ggimage_0.3.3          
## [58] coda_0.19-4.1           evaluate_1.0.3          gridGraphics_0.5-1     
## [61] future_1.34.0           pillar_1.10.1           foreach_1.5.2          
## [64] ggfun_0.1.8             generics_0.1.3          grImport2_0.3-3        
## [67] hms_1.1.3               munsell_0.5.1           scales_1.3.0           
## [70] globals_0.16.3          glue_1.8.0              scatterplot3d_0.3-44   
## [73] lazyeval_0.2.2          tools_4.4.1             fs_1.6.5               
## [76] mvtnorm_1.3-3           XML_3.99-0.18           fastmatch_1.1-6        
## [79] grid_4.4.1              nlme_3.1-167            patchwork_1.3.0        
## [82] cli_3.6.4               DEoptim_2.2-8           textshaping_1.0.0      
## [85] rsvg_2.6.1              expm_1.0-0              gtable_0.3.6           
## [88] yulab.utils_0.2.0       sass_0.4.9              digest_0.6.37          
## [91] ggplotify_0.1.2         farver_2.1.2            htmltools_0.5.8.1      
## [94] lifecycle_1.0.4         httr_1.4.7              MASS_7.3-65